#Alexander F. Gazmararian
#afg2@princeton.edu
#January 9, 2024

###DO NOT RUN
###Without first requesting access to elections data

###Presidential election data are restricted
###But under the terms of the access and copyright agreement
###   I can share them with you directly. To do so, email me at: 
###   afg2@princeton.edu  --or--  agazmararian@gmail.com

###Copyright and access info: #https://library.princeton.edu/resource/title/atlas-us-presidential-elections

###The code below are for transparency about the transformations made to the data

#Load packages
library(tidyverse)
library(janitor)
library(readxl)
library(tidycensus)

#Load data here to g
g <- read_xlsx()
g <- subset(g, select = c(RaceDate, State, Area, TotalVotes:DemVotesMajorPercent))
g$year <- as.numeric(substr(g$RaceDate, 0, 4))
#drop Alaska
g$RaceDate <- NULL
#subset to recent elections
g <- subset(g, year >= 1960)
#fix county names and get fips codes
##download fips codes
fips <- tidycensus::fips_codes
fips <- subset(fips, state_name != "Alaska" & state_name != "Puerto Rico" & state_name != "American Samoa" & state_name != "Guam" & state_name != "Northern Mariana Islands"
               & state_name != "U.S. Minor Outlying Islands" & state_name != "U.S. Virgin Islands" & state_name != "District of Columbia")
fips$county <- gsub(" County", "", fips$county)
fips$county <- gsub(" Parish", "", fips$county)
fips$county <- tolower(fips$county)
`%notin%` <- Negate(`%in%`)
fips$county <- with(fips, ifelse(
  (state_name == "Virginia" & county %notin% c("bedford city","charles city","james city", "franklin city", "fairfax city","richmond city","roanoke city") |
     (state_name == "Missouri" & county %notin% c("kansas city","st. louis city"))
  ), gsub(" city", "", county), county))
fips[fips$state=="MS"&fips$county=="desoto",]$county<-"de soto"
fips[fips$state=="TX"&fips$county=="dewitt",]$county<-"de witt"
fips[fips$state=="IL"&fips$county=="dupage",]$county<-"du page"
fips[fips$state=="ND"&fips$county=="lamoure",]$county<-"la moure"
fips[fips$state=="IN"&fips$county=="laporte",]$county<-"la porte"
fips[fips$state=="IL"&fips$county=="lasalle",]$county<-"la salle"
fips[fips$state=="MD"&fips$county=="prince george's",]$county<-"prince georges"
fips[fips$state=="MD"&fips$county=="queen anne's",]$county<-"queen annes"
fips[fips$state=="MD"&fips$county=="st. mary's",]$county<-"st. marys"
##prepare county names for merge
g$Area <- tolower(g$Area)
##conduct initial merge
g <- subset(g, State != "Alaska" & State!="District of Columbia" & Area != "votes not reported by county" & Area != "special ballots" & Area != "special absentee" & Area!="federal absentees" & Area!="federal ballots" &Area!="overseas vote")
gm <- subset(g, select = c(State, Area))
gm <- unique(gm)
gm <- merge(gm, fips, by.x = c("Area","State"), by.y = c("county","state_name"), all.x = TRUE)
##merge
g <- merge(g, gm, by = c("Area", "State"))
#Rename variables
g <- g %>%
  rename(
    county = Area,
    state = State,
    state_abb = state
  )
#Construct FIPS codes
g$fips <- with(g, paste0(state_code, county_code))
g$fips <- as.numeric(g$fips)
#Remove observations without FIPS codes
g <- subset(g, !is.na(fips))
#Drop variables
g$state_code <- NULL
g$county_code <- NULL
#fix miami dade county fips, which changes
#Florida, 1997: Dade county (FIPS 12025) is renamed as Miami-Dade county (FIPS 12086).
g[g$fips==12086&g$year<1997,]$fips<-12025
g[g$fips==29186&g$year<1979,]$fips<-29193
#clean last entries
g <- g %>% mutate(across(c(RepVotes,DemVotes,TotalVotes,ThirdVotes,OtherVotes), ~ as.numeric(.x)))
g <- filter(g, year > 1968)
#save data
saveRDS(g, here("data", "inter", "pres_elec.rds"))
